/*

Copyright (C) 2000,2001  Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)

This file is part of GIZA++ ( extension of GIZA ).

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.

*/
#ifndef _d5tables_h_define
#define _d5tables_h_define
#include <cmath>
#include "D4Tables.h"

extern float d5modelsmooth_countoffset;
extern float d5modelsmooth_factor;

#define UNSEENPROB (1.0/vacancies_total)

class d5model
{
private:
  typedef Vector < pair < COUNT,PROB > >Vpff;
  map< m4_key,Vpff,compare1 > D1;
  map< m4_key,Vpff,compareb1 > Db1;
public:
  d4model&d4m;
  WordClasses* ewordclasses;
  WordClasses* fwordclasses;
  template<class MAPPER>
  void makeWordClasses(const MAPPER&m1,const MAPPER&m2,string efile,string ffile
                       , const vcbList& elist,
                       const vcbList& flist) {
    ifstream estrm(efile.c_str()),fstrm(ffile.c_str());
    if( !estrm )
      cerr << "ERROR: can not read classes from " << efile << endl;
    else
      ewordclasses->read(estrm,m1,elist);
    if( !fstrm )
      cerr << "ERROR: can not read classes from " << ffile << endl;
    else
      fwordclasses->read(fstrm,m2,flist);
  }
  d5model (d4model&_d4m)
    :D1 (compare1(M5_Dependencies)), Db1 (compareb1(M5_Dependencies)),d4m(_d4m),
     ewordclasses(_d4m.ewordclasses),fwordclasses(_d4m.fwordclasses)
  {}
  COUNT &getCountRef_first (PositionIndex vacancies_j,
                            PositionIndex vacancies_jp, int F,
                            PositionIndex l, PositionIndex m,
                            PositionIndex vacancies_total) {
    massert(vacancies_j>0);
    massert(vacancies_total>0);
    //massert(vacancies_jp<=vacancies_total);
    massert(vacancies_j <=vacancies_total);
    massert(vacancies_total<=m);
    m4_key key(M5_Dependencies,l,m,F,0,0,vacancies_jp,vacancies_total);
    map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
    if(p==D1.end())
      p=D1.insert(make_pair(key,Vpff(vacancies_total+1,make_pair(0,UNSEENPROB)))).first; // !!! constrain length
    massert(p!=D1.end());
    return (p->second)[vacancies_j].first;
  }
  COUNT &getCountRef_bigger (PositionIndex vacancies_j,
                             PositionIndex vacancies_jp, int F,
                             PositionIndex l, PositionIndex m,
                             PositionIndex vacancies_total) {
    massert(vacancies_j>0);
    massert(vacancies_total>0);
    massert (vacancies_jp <= vacancies_j);
    massert (vacancies_j-vacancies_jp <= vacancies_total);
    m4_key key(M5_Dependencies,l,m,F,0,0,-1,vacancies_total);
    map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
    if(p==Db1.end())
      p=Db1.insert(make_pair(key,Vpff(vacancies_total+1,make_pair(0,UNSEENPROB)))).first; // !!! constrain length
    massert(p!=Db1.end());
    return (p->second)[vacancies_j - vacancies_jp].first;
  }
  PROB getProb_first (PositionIndex vacancies_j, PositionIndex vacancies_jp,
                      int F, PositionIndex l, PositionIndex m,
                      PositionIndex vacancies_total) const {
    massert(vacancies_j>0);
    massert(vacancies_total>0);
    //massert(vacancies_jp<=vacancies_total);
    massert(vacancies_j <=vacancies_total);
    massert(vacancies_total<=m);
    m4_key key(M5_Dependencies,l,m,F,0,0,vacancies_jp,vacancies_total);
    map<m4_key,Vpff,compare1 >::const_iterator p=D1.find(key);
    if( p==D1.end() )
      return UNSEENPROB;
    else
      return max(PROB_SMOOTH,d5modelsmooth_factor/(vacancies_total)+(1-d5modelsmooth_factor)*(p->second)[vacancies_j].second);
  }
  PROB getProb_bigger (PositionIndex vacancies_j, PositionIndex vacancies_jp,
                       int F, PositionIndex l, PositionIndex m,
                       PositionIndex vacancies_total) const {
    massert(vacancies_j>0);
    massert(vacancies_total>0);
    massert (vacancies_jp <= vacancies_j);
    massert (vacancies_j-vacancies_jp <= vacancies_total);
    m4_key key(M5_Dependencies,l,m,F,0,0,-1,vacancies_total);
    map<m4_key,Vpff,compareb1 >::const_iterator p=Db1.find(key);
    if(p==Db1.end())
      return UNSEENPROB;
    else
      return max(PROB_SMOOTH,d5modelsmooth_factor/(vacancies_total)+(1-d5modelsmooth_factor)*(p->second)[vacancies_j - vacancies_jp].second);
  }
  void normalizeTable () {
    int nParams=0;
    for(map<m4_key,Vpff,compare1 >::iterator i=D1.begin(); i!=D1.end(); ++i) {
      Vpff&d1=i->second;
      COUNT sum=0.0;
      for(PositionIndex i=0; i<d1.size(); i++)
        sum+=d1[i].first+d5modelsmooth_countoffset;
      for(PositionIndex i=0; i<d1.size(); i++) {
        d1[i].second=sum?((d1[i].first+d5modelsmooth_countoffset)/sum):(1.0/d1.size());
        nParams++;
      }
    }
    for(map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin(); i!=Db1.end(); ++i) {
      Vpff&db1=i->second;
      double sum=0.0;
      for(PositionIndex i=0; i<db1.size(); i++)
        sum+=db1[i].first+d5modelsmooth_countoffset;
      for(PositionIndex i=0; i<db1.size(); i++) {
        db1[i].second=sum?((db1[i].first+d5modelsmooth_countoffset)/sum):(1.0/db1.size());
        nParams++;
      }
    }
    cout << "D5 table contains " << nParams << " parameters.\n";
  }

  friend ostream&operator<<(ostream&out,d5model&d5m) {
    out << "# Translation tables for Model 5 .\n";
    out << "# Table for head of cept.\n";
    for(map<m4_key,Vpff,compare1 >::const_iterator i=d5m.D1.begin(); i!=d5m.D1.end(); ++i) {
      const Vpff&d1=i->second;
      COUNT sum=0.0;
      for(PositionIndex ii=0; ii<d1.size(); ii++)sum+=d1[ii].first;
      if ( sum ) {
        for(unsigned ii=0; ii<d1.size(); ii++) {
          print1_m5(out,i->first,*d5m.ewordclasses,*d5m.fwordclasses);
          out << (int)(ii) << ' ' << d1[ii].second  << ' ' << d1[ii].first << '\n';
        }
        out << endl;
      }
    }
    out << "# Table for non-head of cept.\n";
    for(map<m4_key,Vpff,compareb1 >::const_iterator i=d5m.Db1.begin(); i!=d5m.Db1.end(); ++i) {
      const Vpff&db1=i->second;
      double sum=0.0;
      for(PositionIndex ii=0; ii<db1.size(); ++ii)sum+=db1[ii].first;
      if( sum ) {
        for(unsigned ii=0; ii<db1.size(); ii++) {
          printb1_m5(out,i->first,*d5m.fwordclasses);
          out << (int)(ii) << ' ' << db1[ii].second << ' ' << db1[ii].first << '\n';
        }
        out << endl;
      }
    }
    return out;
  }
  void readProbTable(const char*x) {
    ifstream f(x);
    string l;
    while(getline(f,l)) {
      if(l.length()&&l[0]=='#')
        continue;
      istrstream is(l.c_str());
      string E,F;
      int v1,v2,ii;
      double prob,count;
      if(is>>E>>F>>v1>>v2>>ii>>prob>>count) {
        //cerr << "Read: " << E << " " << F << " " << v1 << " " << v2 << " " << prob<< endl;
        if( count>0 )
          if( E=="-1")
            getCountRef_bigger(ii,0,(*fwordclasses)(F),1000,1000,v2)+=count;
          else
            getCountRef_first(ii,v1,(*fwordclasses)(F),1000,1000,v2)+=count;
      }
    }
    normalizeTable();
    //ofstream of("M5FILE");
    //of << (*this);
  }
  void clear() {
    for(map<m4_key,Vpff,compare1 >::iterator i=D1.begin(); i!=D1.end(); ++i) {
      Vpff&d1=i->second;
      for(PositionIndex i=0; i<d1.size(); i++)
        d1[i].first=0.0;
    }
    for(map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin(); i!=Db1.end(); ++i) {
      Vpff&db1=i->second;
      for(PositionIndex i=0; i<db1.size(); i++)
        db1[i].first=0.0;
    }
  }
};

#endif



